suppressPackageStartupMessages({
  library(tidyverse)
  library(irlba)
  library(DropletUtils)
  library(scater)
  library(scran)
  library(Seurat) ## just 4 loading the object
  library(SingleCellExperiment)
  library(miloR)
  library(patchwork)
  library(igraph)
  })
There were 24 warnings (use warnings() to see them)

Using data from Ramachandran et al. 2019 (GEO accessiion: GSE136103). The Seurat object was downloaded from https://datashare.is.ed.ac.uk/handle/10283/3433, upon request to the authors.

load("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue.rdata")
## Convert to SingleCellExperiment
liver_sce <- SingleCellExperiment(assay = list(counts=tissue@raw.data, logcounts=tissue@data), 
                                  colData = tissue@meta.data)

Feature selection

dec_liver <- modelGeneVar(liver_sce)

# Visualizing the fit:
fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")


hvgs <- getTopHVGs(dec_liver, n=3000)

Dim reduction

liver_sce <- runPCA(liver_sce, subset_row=hvgs, ncomponents=11)

plotPCA(liver_sce, colour_by="condition", ncomponents=3)

scater::plotUMAP(liver_sce, colour_by="condition", point_alpha=1,  point_size=0.5) 
Error in reducedDimNames(object) : object 'liver_sce' not found

Apply Milo

Let’s test for differential abundance between healthy and cirrhotic livers.

Sample neighbourhoods

Make design matrix

liver_meta <- as.tibble(colData(liver_milo)[,c("dataset","condition")]) 
`as.tibble()` is deprecated as of tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
liver_meta <- distinct(liver_meta) %>%
  mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
  column_to_rownames("dataset")

Test DA

## Save milo object and results
saveRDS(liver_milo,"/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
write_csv(milo_res, "/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
liver_milo <- readRDS("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
milo_res <- read_csv("/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
Parsed with column specification:
cols(
  logFC = col_double(),
  logCPM = col_double(),
  F = col_double(),
  PValue = col_double(),
  FDR = col_double(),
  Nhood = col_double(),
  SpatialFDR = col_double(),
  annotation_indepth = col_character(),
  annotation_indepth_fraction = col_double(),
  annotation_lineage = col_character()
)
# 
# liver_milo_2 <- Milo(as(liver_milo, 'SingleCellExperiment'))
# 
# liver_milo_2@graph <- liver_milo@graph
# liver_milo@nhoodDistances <- liver_milo2@nhoodDistances
# liver_milo_2@graph <- liver_milo@graph

Visualize results

milo_res %>%
  ggplot(aes(logFC, -log10(SpatialFDR))) + 
  geom_point(size=0.4) +
  geom_hline(yintercept = -log10(0.1))

Check cell type composition of DA neighbourhoods

#' Add annotation of most frequent cell type per nhood to milo results table
add_nhood_coldata_to_res <- function(liver_milo, milo_res, coldata_col){
  nhood_counts <- sapply(seq_along(nhoods(liver_milo)), function(x) table(colData(liver_milo)[as.vector(nhoods(liver_milo)[[x]]), coldata_col]))
  nhood_counts <- t(nhood_counts)
  rownames(nhood_counts) <- seq_along(nhoods(liver_milo))
  max_val <- apply(nhood_counts, 1, function(x) colnames(nhood_counts)[which.max(x)])
  max_frac <- apply(nhood_counts, 1, function(x) max(x)/sum(x))
  milo_res[coldata_col] <- max_val
  milo_res[paste0(coldata_col, "_fraction")] <- max_frac
  return(milo_res)
}

milo_res <- add_nhood_coldata_to_res(liver_milo, milo_res, "annotation_indepth")
milo_res$annotation_lineage.x <- NULL
milo_res$annotation_lineage.y <- NULL
milo_res$annotation_lineage <- NULL
anno_df <- data.frame(liver_milo@colData) %>%
  distinct(annotation_lineage, annotation_indepth)
milo_res <- left_join(milo_res, anno_df, by="annotation_indepth")
rownames(milo_res) <- 1:nrow(milo_res)
Setting row names on a tibble is deprecated.

This shows that I can recover all the clusters where DA was detected in the original paper (see all the barplots for each lineage) and more! All in a single analysis, and without knowing where the subclusters are. Let’s bear in mind that positive logFC –> more cirrhotic, negative logFC —> more healthy

paper_DA <- list(cirrhotic=c("MPs (4)","MPs (5)",
                             "Endothelia (6)", "Endothelia (7)",
                             "Mes (3)",
                             "Tcells (2)",
                             "Myofibroblasts"
                             ),
                 healthy=c("MPs (7)",
                           "Endothelia (1)",
                           "Tcells (1)", "Tcells (3)","Tcells (1)",
                           "ILCs (1)"
                           )
                 )

expDA_df <- bind_rows(
  data.frame(annotation_indepth = paper_DA[["cirrhotic"]], pred_DA="cirrhotic"),
  data.frame(annotation_indepth = paper_DA[["healthy"]], pred_DA="healthy")
  )

pl1 <- milo_res %>%
  left_join(expDA_df) %>%
  mutate(is_signif = ifelse(SpatialFDR < 0.1, 1, 0)) %>%
  mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
  arrange(annotation_lineage) %>%
  mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
  ggplot(aes(annotation_indepth, logFC, color=logFC_color)) +
  scale_color_gradient2() +
  guides(color="none") +
  xlab("annotation") + ylab("Log Fold Change") +
  ggbeeswarm::geom_quasirandom(alpha=1) +
  coord_flip() +
  facet_grid(annotation_lineage~., scales="free", space="free") +
  theme_bw(base_size=16) +
  theme(strip.text.y =  element_text(angle=0),
        axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
        )
Joining, by = "annotation_indepth"
pl2 <- milo_res %>%
  left_join(expDA_df) %>%
  # dplyr::filter(!is.na(pred_DA)) %>%
  group_by(annotation_indepth) %>%
  summarise(pred_DA=dplyr::first(pred_DA), annotation_lineage=dplyr::first(annotation_lineage)) %>%
  mutate(end=ifelse(pred_DA=="healthy", 0, 1),
         start=ifelse(pred_DA=="healthy", 1, 0)) %>%
  ggplot(aes(annotation_indepth, start, xend = annotation_indepth, yend = end, color=pred_DA)) +
  geom_segment(size=1,arrow=arrow(length = unit(0.1, "npc"), type="closed")) +
  coord_flip() +
  xlab("annotation") +
  facet_grid(annotation_lineage~.,
    # annotation_lineage~"Ramachandran et al.\nDA predictions", 
             scales="free", space="free") +
  # guides(color="none") +
  scale_color_brewer(palette="Set1", direction = -1, 
                     labels=c("enriched in cirrhotic", "enriched in healthy"),
                     na.translate = F,
                     name="Ramachandran et al.\nDA predictions") +
  guides(color=guide_legend(ncol = 1)) +
  theme_bw(base_size=16) +
  ylim(-0.1,1.1) +
  theme(strip.text.y = element_blank(),strip.text.x = element_text(angle=90),
        plot.margin = unit(c(0,0,0,0), "cm"), panel.grid = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        legend.position = "bottom")
Joining, by = "annotation_indepth"
`summarise()` ungrouping output (override with `.groups` argument)
(pl2 + pl1 + 
  plot_layout(widths=c(1,10), guides = "collect") & theme(legend.position = 'top', legend.justification = 0)) +
  ggsave("~/milo_output/liver_DAcomparison.pdf", width=8, height = 13)

Visualization with nhood graph

liver_milo <- buildNhoodGraph(liver_milo)
[1] "Calculating nhood adjacency...."

Close-up on Endothelial lineage

Differential expression between DA neighbourhoods

I will try this on the endothelial lineage. I want to merge overlapping nhoods with significant DA and the same direction of logFC, and then test for differential expression between cells in up and down nhoods (I guess you could also do up or down VS all the rest). This allows us to perform a comparison without further clustering.

subset.nhoods[1:(length(subset.nhoods)-1)]
  [1]   11   20   21   22   26   46   49   50   83  104  112  118  119  128  132  149  151  156  162  193  226  233  246  272  295  296  309  315  325  330
 [31]  360  364  365  371  377  382  385  386  388  391  395  401  420  422  438  443  449  479  480  481  485  499  500  505  527  531  536  537  538  551
 [61]  553  559  560  569  576  594  605  613  614  620  621  645  660  666  682  690  702  705  711  716  725  726  736  738  751  752  768  772  773  781
 [91]  782  799  806  810  812  821  837  849  852  865  870  923  926  928  970  986 1002 1006 1010 1013 1051 1053 1054 1061 1073 1086 1088 1093 1096 1097
[121] 1108 1118 1121 1126 1137 1147 1148 1152 1153 1171 1182 1188 1190 1196 1214 1217 1224 1232 1236 1238 1241 1251 1256 1258 1262 1263 1266 1268 1269 1275
[151] 1277 1285 1289 1306 1312 1313 1339 1343 1348 1352 1367 1379 1380 1391 1419 1422 1424 1427 1433 1436 1443 1450 1454 1465 1481 1497 1502 1524 1528 1538
[181] 1540 1550 1568 1572 1582 1597 1608 1609 1619 1623 1627 1629 1640 1652 1653 1654 1659 1661 1663 1669 1673 1675 1677 1679 1680 1685 1690 1700 1702 1709
[211] 1714 1718 1726 1730 1732 1737 1739 1749 1765 1766 1771 1773 1787 1804 1805 1817 1828 1860 1864 1869 1872 1878 1887 1905 1910 1922 1926 1927 1930 1936
[241] 1946 1959 1972 1984 1989 2004 2020 2026 2041 2045 2053 2072 2079 2116 2118 2119 2122 2126 2130 2136 2140 2141 2145 2152 2155 2157 2164 2168 2177 2185
[271] 2190 2193 2199 2207 2209 2211 2215 2219 2227 2229 2233 2258 2260 2278 2291 2292 2334 2351 2355 2373 2377 2394 2396 2405 2423 2424 2429 2449 2450 2455
[301] 2462 2463 2473 2489 2491 2495 2498 2501 2504 2507 2508 2517 2527 2534 2552 2557 2559 2563 2567 2584 2585 2587 2589 2593 2602 2606 2609 2615 2620 2622
[331] 2626 2632 2639 2646 2653 2657 2664 2666 2672 2673 2675 2677 2684 2685 2691 2692 2695
endo_milo <- buildNhoodGraph(endo_milo)
[1] "Calculating nhood adjacency...."
'CD59' %in% endo_hvgs
[1] FALSE
nhood_markers <- findNhoodMarkers(liver_milo, milo_res, overlap=1, assay="logcounts", return.groups = TRUE,
                                  subset.row = endo_hvgs,
                                  subset.nhoods = milo_res$annotation_lineage=="Endothelia")
Found 1404 DA neighbourhoods at FDR 10%
Nhoods aggregated into 2 groups
marker_genes <-
  nhood_markers$dge %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "adj.P.Val_")) %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "logFC_"), names_to='smp', values_to="logFC") %>%
   mutate(name=str_remove(name, "adj.P.Val_"), smp=str_remove(smp,"logFC_")) %>%
  dplyr::filter(smp==name) %>%
  dplyr::filter(smp==1) %>%
  mutate(logFC_dir=ifelse(logFC < 0, "down", 'up')) %>%
  group_by(logFC_dir) %>%
  top_n(n=50, -log10(value)) %>%
  ungroup() %>%
  # dplyr::filter(value < 0.01 & abs(logFC) > 0.02) %>%
  pull(GeneID) %>%
  unique()


nhood_markers$dge %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "adj.P.Val_")) %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "logFC_"), names_to='smp', values_to="logFC") %>%
  mutate(name=str_remove(name, "adj.P.Val_"), smp=str_remove(smp,"logFC_")) %>%
  dplyr::filter(smp==name) %>%
  mutate(color=ifelse(value<0.05, 1, 0),
         label=ifelse(GeneID %in% marker_genes, GeneID,NA)) %>%
  ggplot(aes(logFC, -log10(value), color=color)) +
  geom_point(size=0.1) +
  facet_wrap(name~., scales="free") +
  geom_hline(yintercept = -log10(0.01)) +
  ggrepel::geom_text_repel(aes(label=label))

marker_genes <- nhood_markers$dge %>%
  dplyr::filter(adj.P.Val_1 < 0.01) %>%
  pull(GeneID)

Customize plotting for paper figure

x <- liver_milo
da.res <- milo_res

cluster_features = TRUE
alpha = 0.05
scale_to_1 = TRUE
subset.nhoods = milo_res$annotation_lineage=="Endothelia"
features <- marker_genes

expr_mat <- nhoodExpression(x)[features,]
colnames(expr_mat) <- 1:length(nhoods(x))

## Get nhood expression matrix
if (!is.null(subset.nhoods)) {
  expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
}

if (!isFALSE(scale_to_1)) {
  expr_mat <- t(apply(expr_mat, 1, function(x) (x - min(x))/(max(x)- min(x))))
}

rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame

pl_df <- data.frame(t(expr_mat)) %>%
  rownames_to_column("Nhood") %>%
  mutate(Nhood=as.double(Nhood)) %>%
  left_join(da.res, by="Nhood") %>%
  mutate(logFC_rank=percent_rank(logFC)) 

## Top plot: nhoods ranked by DA log FC
pl_top <- pl_df %>%
  mutate(is_signif = ifelse(SpatialFDR < alpha, paste0("SpatialFDR < ", alpha), NA)) %>%
  ggplot(aes(logFC_rank, logFC)) +
  geom_hline(yintercept = 0, linetype=2) +
  geom_point(size=0.2, color="grey") +
  geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=1) +
  theme_bw(base_size=16) +
  ylab("DA logFC") +
  scale_color_manual(values="red", name="") +
  scale_x_continuous(expand = c(0.01, 0)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())

## Bottom plot: gene expression heatmap
if (isTRUE(cluster_features)) {
  row.order <- hclust(dist(expr_mat))$order # clustering
  ordered_features <- rownames(expr_mat)[row.order]
} else {
  ordered_features <- rownames(expr_mat)
}

Quick GO term analysis

Let’s check the genes identified as markers for the disease subtypes. Are they significantly differentially expressed between DA neighbourhoods? Yes they are!

disease_endo_markers <- c("ACKR1", 'CD34',"VWA1")

data.frame(markers$negLogFC_2) %>%
  filter(FDR < 0.05) %>%
  .[disease_endo_markers,]

data.frame(markers$negLogFC_1) %>%
  filter(FDR < 0.05) %>%
  .[disease_endo_markers,]

Visualize some of the markers that differentiate DA neighbourhoods, by plotting the percent of cells expressing each gene in a nhood.

## Define plotting functions
.calculate_nhood_perc_expression <- function(milo, nhoods, gene){
  gene_cnts <- counts(milo)[gene,]
  perc_expr <- sapply(nhoods(milo)[nhoods], function(x) sum(gene_cnts[x]>0)/length(x))
  perc_expr <- setNames(perc_expr, nhoods)
  return(perc_expr)
  }

.plot_nhood_expression <- function(milo, nhoods, features){
  perc_expr_mat <- sapply(features, 
                          function(x) .calculate_nhood_perc_expression(milo, nhoods, x))
  
  pl_df <- data.frame(perc_expr_mat) %>%
    rownames_to_column("Nhood") %>%
    mutate(Nhood=as.double(Nhood)) %>%
    left_join(milo_res) %>%
    mutate(logFC_rank=rank(logFC)) 
  
  pl_top <- pl_df %>%
      mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>%
      ggplot(aes(logFC_rank, logFC)) +
      geom_hline(yintercept = 0, linetype=2) +
      geom_point(size=0.2) +
      geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) +
      theme_bw() +
      scale_color_manual(values="red", name="") +
      theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
    
  pl_bottom <- pl_df %>%
    pivot_longer(cols=features, names_to='feature', values_to="perc_expressed") %>%
    mutate(feature=factor(feature, levels=features)) %>%
    ggplot(aes(logFC_rank, feature, fill=perc_expressed)) + 
    geom_tile() +
    scale_fill_viridis_c(option="magma") +
    ggbio::theme_clear()
  
  (pl_top / pl_bottom) + plot_layout(heights = c(1,2))
}
endo_nhoods <- endo_res %>%  pull(Nhood)

## Select genes and sort by AUC
feats_neg2 <-
  data.frame(markers$negLogFC_2) %>% 
  top_n(50, - log10(FDR)) %>%
  arrange(AUC.posLogFC_1) %>%
  rownames()

.plot_nhood_expression(liver_milo, endo_nhoods, features=feats)

As described in the paper, we have that genes associated with extracellular matrix organization (e.g. VIM, ) are over expressed

## Select genes and sort by AUC
feats_neg1 <-
  data.frame(markers$negLogFC_1) %>% 
  top_n(50, - log10(FDR)) %>%
  arrange(AUC.posLogFC_1) %>%
  rownames()

.plot_nhood_expression(liver_milo, endo_nhoods, features=feats)
feats_neg1vsNeg2 <-
  data.frame(markers$negLogFC_1) %>% 
  top_n(50, - log10(FDR)) %>%
  arrange(AUC.negLogFC_2) %>%
  rownames()

.plot_nhood_expression(liver_milo, endo_nhoods, features=feats_neg1vsNeg2)

---
title: "Milo on disease VS healthy dataset"
output: html_notebook
---

```{r}
suppressPackageStartupMessages({
  library(tidyverse)
  library(irlba)
  library(DropletUtils)
  library(scater)
  library(scran)
  library(Seurat) ## just 4 loading the object
  library(SingleCellExperiment)
  library(miloR)
  library(patchwork)
  library(igraph)
  })
```

Using data from [Ramachandran et al. 2019](https://www.nature.com/articles/s41586-019-1631-3#Sec1) (GEO accessiion: GSE136103). The Seurat object was downloaded from https://datashare.is.ed.ac.uk/handle/10283/3433, upon request to the authors.

```{r}
load("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue.rdata")
```

```{r}
## Convert to SingleCellExperiment
liver_sce <- SingleCellExperiment(assay = list(counts=tissue@raw.data, logcounts=tissue@data), 
                                  colData = tissue@meta.data)
```

### Feature selection

```{r}
dec_liver <- modelGeneVar(liver_sce)

# Visualizing the fit:
fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")

hvgs <- getTopHVGs(dec_liver, n=3000)
```

### Dim reduction

```{r, fig.height=14, fig.width=14}
liver_sce <- runPCA(liver_sce, subset_row=hvgs, ncomponents=11)

plotPCA(liver_sce, colour_by="condition", ncomponents=3)
```

```{r, fig.height=8, fig.width=8}
liver_sce <- runUMAP(liver_sce, dimred="PCA", ncomponents=2)

scater::plotUMAP(liver_sce, colour_by="condition", point_alpha=1,  point_size=0.5) 
scater::plotUMAP(liver_sce, colour_by="dataset", point_alpha=0.3,  point_size=0.5)
scater::plotUMAP(liver_sce, colour_by="annotation_lineage", point_alpha=0.3,  point_size=0.5, text_by='annotation_lineage')
# scater::plotUMAP(liver_sce, colour_by='annotation_indepth', point_alpha=0.3,  point_size=0.5, text_by='annotation_indepth')
```

### Apply Milo

Let's test for differential abundance between healthy and cirrhotic livers.

#### Sample neighbourhoods

```{r}
liver_milo <- Milo(liver_sce)

## Build KNN graph
liver_milo <- buildGraph(liver_milo, d = 11, k=30)

## Compute neighbourhoods with refined sampling
liver_milo <- makeNhoods(liver_milo, k=30, d=11, prop = 0.05, refined=TRUE)
plotNhoodSizeHist(liver_milo, bins=150)
```

#### Make design matrix

```{r}
liver_meta <- as.tibble(colData(liver_milo)[,c("dataset","condition")]) 
liver_meta <- distinct(liver_meta) %>%
  mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
  column_to_rownames("dataset")
```

#### Test DA


```{r}
liver_milo <- countCells(liver_milo, samples = "dataset", meta.data = data.frame(colData(liver_milo)[,c("dataset","condition")]) )

milo_res <- testNhoods(liver_milo, design = ~ condition, design.df = liver_meta, fdr.weighting = "k-distance")
```

```{r}
## Save milo object and results
saveRDS(liver_milo,"/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
write_csv(milo_res, "/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
```
```{r}
liver_milo <- readRDS("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
milo_res <- read_csv("/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
# 
# liver_milo_2 <- Milo(as(liver_milo, 'SingleCellExperiment'))
# 
# liver_milo_2@graph <- liver_milo@graph
# liver_milo@nhoodDistances <- liver_milo2@nhoodDistances
# liver_milo_2@graph <- liver_milo@graph
```



Visualize results
```{r}
milo_res %>%
  ggplot(aes(logFC, -log10(SpatialFDR))) + 
  geom_point(size=0.4) +
  geom_hline(yintercept = -log10(0.1))
```

Check cell type composition of DA neighbourhoods

```{r, fig.width=9, fig.height=10}
#' Add annotation of most frequent cell type per nhood to milo results table
add_nhood_coldata_to_res <- function(liver_milo, milo_res, coldata_col){
  nhood_counts <- sapply(seq_along(nhoods(liver_milo)), function(x) table(colData(liver_milo)[as.vector(nhoods(liver_milo)[[x]]), coldata_col]))
  nhood_counts <- t(nhood_counts)
  rownames(nhood_counts) <- seq_along(nhoods(liver_milo))
  max_val <- apply(nhood_counts, 1, function(x) colnames(nhood_counts)[which.max(x)])
  max_frac <- apply(nhood_counts, 1, function(x) max(x)/sum(x))
  milo_res[coldata_col] <- max_val
  milo_res[paste0(coldata_col, "_fraction")] <- max_frac
  return(milo_res)
}

milo_res <- add_nhood_coldata_to_res(liver_milo, milo_res, "annotation_indepth")
milo_res$annotation_lineage.x <- NULL
milo_res$annotation_lineage.y <- NULL
milo_res$annotation_lineage <- NULL
anno_df <- data.frame(liver_milo@colData) %>%
  distinct(annotation_lineage, annotation_indepth)
milo_res <- left_join(milo_res, anno_df, by="annotation_indepth")

```

This shows that I can recover all the clusters where DA was detected in the original paper (see all the barplots for each lineage) and more! All in a single analysis, and without knowing where the subclusters are. Let's bear in mind that positive logFC --> more cirrhotic, negative logFC ---> more healthy

```{r, fig.width=10, fig.height=10}
paper_DA <- list(cirrhotic=c("MPs (4)","MPs (5)",
                             "Endothelia (6)", "Endothelia (7)",
                             "Mes (3)",
                             "Tcells (2)",
                             "Myofibroblasts"
                             ),
                 healthy=c("MPs (7)",
                           "Endothelia (1)",
                           "Tcells (1)", "Tcells (3)","Tcells (1)",
                           "ILCs (1)"
                           )
                 )

expDA_df <- bind_rows(
  data.frame(annotation_indepth = paper_DA[["cirrhotic"]], pred_DA="cirrhotic"),
  data.frame(annotation_indepth = paper_DA[["healthy"]], pred_DA="healthy")
  )

pl1 <- milo_res %>%
  left_join(expDA_df) %>%
  mutate(is_signif = ifelse(SpatialFDR < 0.1, 1, 0)) %>%
  mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
  arrange(annotation_lineage) %>%
  mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
  ggplot(aes(annotation_indepth, logFC, color=logFC_color)) +
  scale_color_gradient2() +
  guides(color="none") +
  xlab("annotation") + ylab("Log Fold Change") +
  ggbeeswarm::geom_quasirandom(alpha=1) +
  coord_flip() +
  facet_grid(annotation_lineage~., scales="free", space="free") +
  theme_bw(base_size=16) +
  theme(strip.text.y =  element_text(angle=0),
        axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
        )

pl2 <- milo_res %>%
  left_join(expDA_df) %>%
  # dplyr::filter(!is.na(pred_DA)) %>%
  group_by(annotation_indepth) %>%
  summarise(pred_DA=dplyr::first(pred_DA), annotation_lineage=dplyr::first(annotation_lineage)) %>%
  mutate(end=ifelse(pred_DA=="healthy", 0, 1),
         start=ifelse(pred_DA=="healthy", 1, 0)) %>%
  ggplot(aes(annotation_indepth, start, xend = annotation_indepth, yend = end, color=pred_DA)) +
  geom_segment(size=1,arrow=arrow(length = unit(0.1, "npc"), type="closed")) +
  coord_flip() +
  xlab("annotation") +
  facet_grid(annotation_lineage~.,
    # annotation_lineage~"Ramachandran et al.\nDA predictions", 
             scales="free", space="free") +
  # guides(color="none") +
  scale_color_brewer(palette="Set1", direction = -1, 
                     labels=c("enriched in cirrhotic", "enriched in healthy"),
                     na.translate = F,
                     name="Ramachandran et al.\nDA predictions") +
  guides(color=guide_legend(ncol = 1)) +
  theme_bw(base_size=16) +
  ylim(-0.1,1.1) +
  theme(strip.text.y = element_blank(),strip.text.x = element_text(angle=90),
        plot.margin = unit(c(0,0,0,0), "cm"), panel.grid = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        legend.position = "bottom")

(pl2 + pl1 + 
  plot_layout(widths=c(1,10), guides = "collect") & theme(legend.position = 'top', legend.justification = 0)) +
  ggsave("~/milo_output/liver_DAcomparison.pdf", width=8, height = 13)
```

### Visualization with nhood graph

```{r}
liver_milo <- buildNhoodGraph(liver_milo)
```
```{r, fig.width=15, fig.height=10}
colourCount = length(unique(liver_milo$annotation_lineage))
getPalette = colorRampPalette(brewer.pal(9, "Spectral"))

umap_df <- data.frame(reducedDim(liver_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

umap1 <- cbind(umap_df, annotation_lineage=liver_milo$annotation_lineage) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=as.character(annotation_lineage))) +
  geom_point(size=0.5) +
  ggrepel::geom_text_repel(data = . %>% 
              group_by(annotation_lineage) %>% 
              summarise(UMAP_1=mean(UMAP_1), UMAP_2=mean(UMAP_2)),
            aes(label=annotation_lineage), color="black", size=4
            ) +
  scale_color_manual(values=getPalette(colourCount)) +
  guides(color="none") +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  theme_classic(base_size = 16) +
  theme(axis.text = element_blank(), axis.ticks = element_blank())

umap2 <- cbind(umap_df, condition=as.character(liver_milo$condition)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point(size=0.5) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color=guide_legend(override.aes = list(size=2))) +
  theme_classic(base_size = 16) +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9))


nh_graph_pl <- plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.05) +
  coord_fixed()

((umap1 / umap2 + plot_layout()) | nh_graph_pl) + 
  plot_layout(widths = c(1,2)) +
  ggsave("~/milo_output/liver_embedding.pdf", width=15, height = 10)
```


## Close-up on Endothelial lineage


<!-- Compute HV genes for subset -->
<!-- ```{r} -->
<!-- dec_endo <- modelGeneVar(endo_milo) -->
<!-- endo_hvgs <- getTopHVGs(dec_endo, n=3000) -->
<!-- ``` -->

```{r, fig.width=10, fig.height=8}
# endo_milo <- scater::runPCA(endo_milo, subset_row=endo_hvgs, ncomponents=11)
endo_milo <- scater::runUMAP(liver_milo[,liver_milo$annotation_lineage=="Endothelia"],  dimred='PCA')

umap_df <- data.frame(reducedDim(endo_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

endo_umap <- cbind(umap_df, condition=endo_milo$condition) %>%
   ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point(size=0.5) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color="none") +
  facet_wrap(condition~., ncol=1) +
  theme_nothing() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
        strip.background = element_rect(color=NA), strip.text = element_text(size=16))

```
<!-- c -->
<!-- ```{r} -->
<!-- nhoodGraph(endo_milo) <- subgraph(nhoodGraph(liver_milo), which(milo_res$annotation_lineage=="Endothelia")) -->
<!-- subset_nh_ixs <- colnames(liver_milo)[unlist(nhoodIndex(liver_milo)[which(milo_res$annotation_lineage=="Endothelia")])] -->

<!-- which(!subset_nh_ixs %in% colnames(endo_milo)) -->
<!-- endo_milo[,subset_nh_ixs] -->

<!-- new_ixs <- 1:ncol(endo_milo) -->
<!-- setNames(new_ixs, old_ixs) -->

<!-- unlist(nhoodIndex(endo_milo)[which(milo_res$annotation_lineage=="Endothelia")]) -->
<!-- nhoodIndex(endo_milo) -->
<!-- vertex_attr(nhoodGraph(endo_milo), "name") <-  -->
<!-- ``` -->


```{r, fig.width=8, fig.height=4}
liver_milo2 <- liver_milo
subset.nhoods <- which(milo_res$annotation_lineage=="Endothelia")

reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP") 

endo_gr <- plotNhoodGraphDA(liver_milo2, milo_res, 
                 subset.nhoods =subset.nhoods[1:(length(subset.nhoods)-1)], alpha = 1)


((endo_umap + endo_gr ) + 
  plot_layout(widths = c(1,2), 
                guides = "collect"
                )) +
  ggsave("~/milo_output/liver_endoGraph.pdf", width=9, height = 5)  
```


### Differential expression between DA neighbourhoods

I will try this on the endothelial lineage. I want to merge overlapping nhoods with significant DA and the same direction of logFC, and then test for differential expression between cells in up and down nhoods (I guess you could also do up or down VS all the rest). This allows us to perform a comparison without further clustering.

```{r}
endo_milo <- liver_milo[,which(liver_milo$annotation_lineage=="Endothelia")]
endo_res <- dplyr::filter(milo_res, annotation_lineage=="Endothelia")

rownames(endo_res) <- rownames(milo_res)[which(milo_res$annotation_lineage=="Endothelia")]
```

```{r}
endo_milo <- buildNhoodGraph(endo_milo)

# plotNhoodGraph(liver_milo[,w(liver_milo$annotation_lineage=="Endothelia")], colour_by = 'annotation_lineage')

plotNhoodGraphDA(endo_milo, endo_milo_res)
```


<!-- ```{r} -->
<!-- endo_milo <- runPCA(endo_milo) -->
<!-- endo_milo <- buildGraph(endo_milo) -->

<!-- endo_milo <- runUMAP(endo_milo) -->
<!-- plotUMAP(endo_milo, colour_by="annotation_indepth") -->
<!-- ``` -->
<!-- ```{r} -->
<!-- endo_milo <- makeNhoods(endo_milo) -->
<!-- endo_milo <- countCells(endo_milo, samples = "dataset", meta.data = data.frame(colData(endo_milo)[,c("dataset","condition")]) ) -->

<!-- endo_milo_res <- testNhoods(endo_milo, design = ~ condition, design.df = liver_meta, fdr.weighting = "k-distance") -->

<!-- ``` -->

```{r}
dec_liver <- modelGeneVar(liver_milo)

# Visualizing the fit:
fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")

hvgs <- getTopHVGs(dec_liver, n=3000)

plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
     col = ifelse(names(fit_liver$mean) %in% hvgs, 'red', "black"),
    ylab="Variance of log-expression")

## keep HV genes expressed in at least 5% of endothelial cells
endo_hvg_cnts <- counts(liver_milo)[hvgs,liver_milo$annotation_lineage=="Endothelia"]
endo_hvgs <- hvgs[(rowSums(endo_hvg_cnts!=0) > (ncol(endo_hvg_cnts)/100)*5) & (rowSums(endo_hvg_cnts!=0) < (ncol(endo_hvg_cnts)/100)*80)] 
```




```{r}
rownames(milo_res) <- names(nhoods(liver_milo)) ## If I don't set the nhood index as name the grouping doesn't
nhood_markers <- findNhoodMarkers(liver_milo, milo_res, overlap=1, assay="logcounts", return.groups = TRUE,
                                  subset.row = endo_hvgs,
                                  subset.nhoods = milo_res$annotation_lineage=="Endothelia")

```



```{r, fig.height=8, fig.width=17}
marker_genes <-
  nhood_markers$dge %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "adj.P.Val_")) %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "logFC_"), names_to='smp', values_to="logFC") %>%
   mutate(name=str_remove(name, "adj.P.Val_"), smp=str_remove(smp,"logFC_")) %>%
  dplyr::filter(smp==name) %>%
  dplyr::filter(smp==1) %>%
  mutate(logFC_dir=ifelse(logFC < 0, "down", 'up')) %>%
  group_by(logFC_dir) %>%
  top_n(n=50, -log10(value)) %>%
  ungroup() %>%
  # dplyr::filter(value < 0.01 & abs(logFC) > 0.02) %>%
  pull(GeneID) %>%
  unique()


nhood_markers$dge %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "adj.P.Val_")) %>%
  pivot_longer(cols=str_subset(colnames(nhood_markers$dge), "logFC_"), names_to='smp', values_to="logFC") %>%
  mutate(name=str_remove(name, "adj.P.Val_"), smp=str_remove(smp,"logFC_")) %>%
  dplyr::filter(smp==name) %>%
  mutate(color=ifelse(value<0.05, 1, 0),
         label=ifelse(GeneID %in% marker_genes, GeneID,NA)) %>%
  ggplot(aes(logFC, -log10(value), color=color)) +
  geom_point(size=0.1) +
  facet_wrap(name~., scales="free") +
  geom_hline(yintercept = -log10(0.01)) +
  ggrepel::geom_text_repel(aes(label=label))
```
```{r}
marker_genes <- nhood_markers$dge %>%
  dplyr::filter(adj.P.Val_1 < 0.01) %>%
  pull(GeneID)
```

```{r, fig.height=18, fig.width=12}
liver_milo <- calcNhoodExpression(liver_milo, subset.row = marker_genes)

plotNhoodExpressionDA(liver_milo, milo_res, marker_genes, cluster_features = TRUE, alpha = 0.05,
                      scale_to_1 = TRUE,
                      subset.nhoods = milo_res$annotation_lineage=="Endothelia"
                      )
```


Customize plotting for paper figure

```{r}
x <- liver_milo
da.res <- milo_res

cluster_features = TRUE
alpha = 0.05
scale_to_1 = TRUE
subset.nhoods = milo_res$annotation_lineage=="Endothelia"
features <- marker_genes

expr_mat <- nhoodExpression(x)[features,]
colnames(expr_mat) <- 1:length(nhoods(x))

## Get nhood expression matrix
if (!is.null(subset.nhoods)) {
  expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
}

if (!isFALSE(scale_to_1)) {
  expr_mat <- t(apply(expr_mat, 1, function(x) (x - min(x))/(max(x)- min(x))))
}

rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame

pl_df <- data.frame(t(expr_mat)) %>%
  rownames_to_column("Nhood") %>%
  mutate(Nhood=as.double(Nhood)) %>%
  left_join(da.res, by="Nhood") %>%
  mutate(logFC_rank=percent_rank(logFC)) 

## Top plot: nhoods ranked by DA log FC
pl_top <- pl_df %>%
  mutate(is_signif = ifelse(SpatialFDR < alpha, paste0("SpatialFDR < ", alpha), NA)) %>%
  ggplot(aes(logFC_rank, logFC)) +
  geom_hline(yintercept = 0, linetype=2) +
  geom_point(size=0.2, color="grey") +
  geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=1) +
  theme_bw(base_size=16) +
  ylab("DA logFC") +
  scale_color_manual(values="red", name="") +
  scale_x_continuous(expand = c(0.01, 0)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())

## Bottom plot: gene expression heatmap
if (isTRUE(cluster_features)) {
  row.order <- hclust(dist(expr_mat))$order # clustering
  ordered_features <- rownames(expr_mat)[row.order]
} else {
  ordered_features <- rownames(expr_mat)
}


```


```{r, fig.height=12, fig.width=10}
highlight_genes <- c("PLVAP", "SOX14", "VWA1", "ACKR1",
                     "CLEC4G", "CLEC4M", "STAB2", "MRC1",
                     "CD14", "CCL21", "SOX17", "WNT2", "RSPO3", "AIF1L",
                     "PROX1", "PDPN","CPE", "CD320")

# ## Ordering features by peak in expression
# ordered_features <-
#   pl_df %>%
#   pivot_longer(cols=rownames(expr_mat), names_to='feature', values_to="avg_expr") %>%
#   group_by(feature) %>%
#   arrange(logFC_rank) %>%
#   mutate(avg_expr = zoo::rollmean(avg_expr,10, fill = NA)) %>%
#   summarise(max_point=logFC_rank[which(avg_expr==max(avg_expr, na.rm=TRUE))]) %>%
#   arrange(max_point) %>%
#   pull(feature)
#   
# ## Ordering features by fc in marker test
# ordered_features <- nhood_markers$dge %>%
#   dplyr::filter(adj.P.Val_1 < 0.01) %>%
#   arrange(logFC_1) %>%
#   unique() %>%
#   pull(GeneID)

pl_bottom <- pl_df %>%
  pivot_longer(cols=rownames(expr_mat), names_to='feature', values_to="avg_expr") %>%
  mutate(feature=factor(feature, levels=ordered_features)) %>% 
  mutate(label=ifelse(feature %in% highlight_genes, as.character(feature), NA)) %>%
  ggplot(aes(logFC_rank, feature, fill=avg_expr)) + 
  geom_tile() +
  scale_fill_viridis_c(option="magma", name="Scaled NH\nexpression") +
  xlab("Endothelial Neighbourhoods") + ylab("DE genes") +
  scale_x_continuous(expand = c(0.01, 0),
                     # limits = c(0, max(pl_df$logFC_rank) + 0.2)
                     ) +
  coord_cartesian(clip="off") +
  ggrepel::geom_text_repel(data=. %>% dplyr::filter(!is.na(label)) %>% 
              group_by(label) %>%
              summarise(logFC_rank=max(logFC_rank), avg_expr=mean(avg_expr), feature=dplyr::first(feature)),
            aes(label=label, x=logFC_rank), size=4, xlim = c(max(pl_df$logFC_rank) + 0.01, max(pl_df$logFC_rank) + 0.02),
            min.segment.length = 0, seed=42
            ) +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_blank(), axis.line.x = element_blank(), axis.ticks.x = element_blank(),
        axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank(),
        panel.spacing = margin(2, 2, 2, 2, "cm"),
        legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(10,10,10,10)
        )

## Assemble plot
(pl_top / pl_bottom) + plot_layout(heights = c(1,4)) +
  ggsave("~/milo_output/liver_DEG_heatmap.pdf", width=9, height = 9)

```

#### Quick GO term analysis

```{r}
install.packages("enrichR")
library(enrichR)

dbs <- listEnrichrDbs()
websiteLive <- TRUE
if (is.null(dbs)) websiteLive <- FALSE
if (websiteLive) head(dbs)

dbs <- c("GO_Molecular_Function_2015", "GO_Cellular_Component_2015", "GO_Biological_Process_2015")
go_marker_genes <- enrichr(marker_genes, dbs, )
go_all_genes <- enrichr(endo_hvgs, dbs)

ref_terms <- go_all_genes$GO_Biological_Process_2015$Term[go_all_genes$GO_Biological_Process_2015$Adjusted.P.value < 0.01]

go_marker_genes$GO_Biological_Process_2015[go_marker_genes$GO_Biological_Process_2015$Adjusted.P.value < 0.01,] %>%
  filter(!Term %in% ref_terms) %>%
  top_n(30, -log10(Adjusted.P.value)) %>%
  mutate(Term=factor(Term, levels=rev(unique(Term)))) %>%
  ggplot(aes(Term, -log10(Adjusted.P.value))) +
  geom_point() +
  coord_flip() +
  xlab("GO BIological Function") + ylab("-log10(Adj. p-value)") +
  theme_bw(base_size=14) +
  ggsave("~/milo_output/liver_endoDEG_GOenrich.pdf", width=9, height = 5)
```


Let's check the genes identified as markers for the disease subtypes. Are they significantly differentially expressed between DA neighbourhoods? Yes they are!

```{r}
disease_endo_markers <- c("ACKR1", 'CD34',"VWA1")

data.frame(markers$negLogFC_2) %>%
  filter(FDR < 0.05) %>%
  .[disease_endo_markers,]

data.frame(markers$negLogFC_1) %>%
  filter(FDR < 0.05) %>%
  .[disease_endo_markers,]
```

Visualize some of the markers that differentiate DA neighbourhoods, by plotting the percent of cells expressing each gene in a nhood.

```{r}
## Define plotting functions
.calculate_nhood_perc_expression <- function(milo, nhoods, gene){
  gene_cnts <- counts(milo)[gene,]
  perc_expr <- sapply(nhoods(milo)[nhoods], function(x) sum(gene_cnts[x]>0)/length(x))
  perc_expr <- setNames(perc_expr, nhoods)
  return(perc_expr)
  }

.plot_nhood_expression <- function(milo, nhoods, features){
  perc_expr_mat <- sapply(features, 
                          function(x) .calculate_nhood_perc_expression(milo, nhoods, x))
  
  pl_df <- data.frame(perc_expr_mat) %>%
    rownames_to_column("Nhood") %>%
    mutate(Nhood=as.double(Nhood)) %>%
    left_join(milo_res) %>%
    mutate(logFC_rank=rank(logFC)) 
  
  pl_top <- pl_df %>%
      mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>%
      ggplot(aes(logFC_rank, logFC)) +
      geom_hline(yintercept = 0, linetype=2) +
      geom_point(size=0.2) +
      geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) +
      theme_bw() +
      scale_color_manual(values="red", name="") +
      theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
    
  pl_bottom <- pl_df %>%
    pivot_longer(cols=features, names_to='feature', values_to="perc_expressed") %>%
    mutate(feature=factor(feature, levels=features)) %>%
    ggplot(aes(logFC_rank, feature, fill=perc_expressed)) + 
    geom_tile() +
    scale_fill_viridis_c(option="magma") +
    ggbio::theme_clear()
  
  (pl_top / pl_bottom) + plot_layout(heights = c(1,2))
}
```


```{r, fig.width=10, fig.height=12}
endo_nhoods <- endo_res %>%  pull(Nhood)

## Select genes and sort by AUC
feats_neg2 <-
  data.frame(markers$negLogFC_2) %>% 
  top_n(50, - log10(FDR)) %>%
  arrange(AUC.posLogFC_1) %>%
  rownames()

.plot_nhood_expression(liver_milo, endo_nhoods, features=feats)
```

As described in the paper, we have that genes associated with extracellular matrix organization (e.g. VIM, ) are over expressed 


```{r, fig.width=10, fig.height=7}
## Select genes and sort by AUC
feats_neg1 <-
  data.frame(markers$negLogFC_1) %>% 
  top_n(50, - log10(FDR)) %>%
  arrange(AUC.posLogFC_1) %>%
  rownames()

.plot_nhood_expression(liver_milo, endo_nhoods, features=feats)

```

```{r, fig.width=10, fig.height=7}
feats_neg1vsNeg2 <-
  data.frame(markers$negLogFC_1) %>% 
  top_n(50, - log10(FDR)) %>%
  arrange(AUC.negLogFC_2) %>%
  rownames()

.plot_nhood_expression(liver_milo, endo_nhoods, features=feats_neg1vsNeg2)
```


<!-- Look just at Endothelia (5) where you have both positive and negative fold-changes -->

<!-- ```{r} -->
<!-- endo5_nhoods <- milo_res %>%  -->
<!--   filter(annotation_indepth=="Endothelia (5)" & annotation_indepth_fraction > 0.7) %>% -->
<!--   pull(Nhood) -->

<!-- perc_expr_mat <- sapply(features,  -->
<!--                         function(x) .calculate_nhood_perc_expression(liver_milo, endo5_nhoods, x)) -->

<!-- pl_df <- data.frame(perc_expr_mat) %>% -->
<!--   rownames_to_column("Nhood") %>% -->
<!--   mutate(Nhood=as.double(Nhood)) %>% -->
<!--   left_join(milo_res) %>% -->
<!--   mutate(logFC_rank=rank(logFC))  -->

<!-- pl_top <- pl_df %>% -->
<!--     mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>% -->
<!--     ggplot(aes(logFC_rank, logFC)) + -->
<!--     geom_hline(yintercept = 0, linetype=2) + -->
<!--     geom_point(size=0.2) + -->
<!--     geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) + -->
<!--     theme_bw() + -->
<!--     scale_color_manual(values="red", name="") + -->
<!--     theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) -->

<!-- pl_bottom <- pl_df %>% -->
<!--   pivot_longer(cols=features, names_to='feature', values_to="perc_expressed") %>% -->
<!--   ggplot(aes(logFC_rank, feature, fill=perc_expressed)) +  -->
<!--   geom_tile() + -->
<!--   scale_fill_viridis_c(option="magma") + -->
<!--   theme_clear() -->

<!-- (pl_top / pl_bottom) + plot_layout(heights = c(1,2)) -->
<!-- ``` -->

<!-- How to find association de novo in Endo 5? -->

<!-- ```{r, fig.height=8, fig.width=8} -->
<!-- ## Find most highly variable genes in this cluster -->
<!-- endo5_milo <- liver_milo[,which(colData(liver_milo)[["annotation_indepth"]]=="Endothelia (5)")] -->
<!-- dec_endo5 <- modelGeneVar(endo5_milo) -->
<!-- endo5_hvgs <- getTopHVGs(dec_endo5, n=1000) -->

<!-- perc_expr_mat <- sapply(endo5_hvgs, function(x) .calculate_nhood_perc_expression(liver_milo, endo5_nhoods, x)) -->

<!-- milo_res_endo5 <- milo_res[which(milo_res$Nhood %in% endo5_nhoods),] -->

<!-- fc_cor <- apply(perc_expr_mat, 2, function(x) Hmisc::rcorr(x, milo_res_endo5$logFC)$r[1,2]) -->
<!-- fc_cor_pval <- apply(perc_expr_mat, 2, function(x) Hmisc::rcorr(x, milo_res_endo5$logFC)$P[1,2]) -->

<!-- cor_feats <- names(which(abs(fc_cor) > 0.6 & abs(fc_cor_pval) < 0.05)) -->
<!-- cor_feats_ordered <- cor_feats[order(fc_cor[cor_feats])] -->

<!-- pl_df <- data.frame(perc_expr_mat[,cor_feats]) %>% -->
<!--   rownames_to_column("Nhood") %>% -->
<!--   mutate(Nhood=as.double(Nhood)) %>% -->
<!--   left_join(milo_res) %>% -->
<!--   mutate(logFC_rank=rank(logFC))  -->

<!-- pl_top <- pl_df %>% -->
<!--     mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>% -->
<!--     ggplot(aes(logFC_rank, logFC)) + -->
<!--     geom_hline(yintercept = 0, linetype=2) + -->
<!--     geom_point(size=0.2) + -->
<!--     geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) + -->
<!--     theme_bw() + -->
<!--     scale_color_manual(values="red", name="") + -->
<!--     theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) -->

<!-- pl_bottom <- pl_df %>% -->
<!--   pivot_longer(cols=str_replace(cor_feats_ordered, "-", "."), names_to='feature', values_to="perc_expressed") %>% -->
<!--   mutate(feature=factor(feature, levels=str_replace(cor_feats_ordered, "-", "."))) %>% -->
<!--   ggplot(aes(logFC_rank, feature, fill=perc_expressed)) +  -->
<!--   geom_tile() + -->
<!--   scale_fill_viridis_c(option="magma") + -->
<!--   theme_clear() -->

<!-- (pl_top / pl_bottom) + plot_layout(heights = c(1,2)) -->
<!-- ``` -->



<!-- ```{r} -->
<!-- ## Run common PCA -->
<!-- merged_cnts <- cbind(nhoodExpression(endo_milo), logcounts(endo_milo)[hvgs,]) -->
<!-- merged_cnts_scaled <- t(scale(t(merged_cnts))) -->
<!-- merged_pca <- BiocSingular::runPCA(t(merged_cnts_scaled), rank=30, center=FALSE) -->
<!-- pca_mat <- rbind(merged_pca$x[(ncol(endo_milo)+1):(ncol(endo_milo)+(length(nhoods(endo_milo)))),], merged_pca$x[colnames(endo_milo),]) -->
<!-- ## Add to slot nhoodsReducedDim -->
<!-- nhoodReducedDim(endo_milo, "PCA") <- pca_mat -->

<!-- ## Run UMAP on joint PCA -->
<!-- umap_out <- uwot::umap(nhoodReducedDim(endo_milo, "PCA"), n_neighbors = 20, n_components = 2, scale=FALSE) -->
<!-- colnames(umap_out) <- c("UMAP_1", "UMAP_2") -->
<!-- nhoodReducedDim(endo_milo, "UMAP") <- umap_out -->

<!-- ``` -->

<!-- ```{r} -->
<!-- split_by=NULL -->
<!-- ## Join test results and dimensionality reductions -->
<!-- rdim_df <- data.frame(nhoodReducedDim(endo_milo, "UMAP")[,c(1,2)]) -->
<!-- colnames(rdim_df) <- c('X','Y') -->

<!-- n_nhoods <- length(nhoods(endo_milo)) -->
<!-- rdim_df[,"Nhood"] <- ifelse(1:nrow(rdim_df) %in% c(1:n_nhoods), c(1:n_nhoods), NA) -->
<!-- viz_df  <- left_join(rdim_df, milo_res[which(milo_res$annotation_lineage=="Endothelia"),], by="Nhood") -->
<!-- viz_df[["nhIndex"]] <- unlist(ifelse(!is.na(viz_df$Nhood), nhoodIndex(endo_milo)[viz_df$Nhood],NA)) -->
<!-- viz_df[is.na(viz_df["nhIndex"]),'nhIndex'] <- 1:ncol(endo_milo) # Add index also to single-cells -->

<!-- if (!is.null(split_by)){ -->
<!--   split_df <- data.frame(split_by=colData(endo_milo)[,split_by]) -->
<!--   split_df[,"nhIndex"] <- 1:nrow(split_df) -->
<!--   viz_df  <- left_join(viz_df, split_df, by="nhIndex") -->
<!-- } -->

<!-- filter_alpha=0.1 -->
<!-- ## Filter significant DA nhoods -->
<!-- if (!is.null(filter_alpha)) { -->
<!--   if (filter_alpha > 0) { -->
<!--     viz_df <- mutate(viz_df, logFC = ifelse(SpatialFDR > filter_alpha, NA, logFC)) -->
<!--   } -->
<!-- } -->

<!-- ## Plot -->
<!-- pt_size=1 -->
<!-- nhood_reduced_dims="UMAP" -->
<!-- components=c(1,2) -->
<!--   pl <- -->
<!--     ggplot(data = arrange(viz_df, abs(logFC)), -->
<!--            aes(X, Y)) + -->
<!--     geom_point(aes(color = ''), size = pt_size / 3, alpha = 0.5) + -->
<!--     geom_point( -->
<!--       data = . %>% filter(!is.na(SpatialFDR)), -->
<!--       aes(fill = logFC), -->
<!--       size = pt_size, -->
<!--       stroke = 0.1, -->
<!--       # colour="black", -->
<!--       shape = 21 -->
<!--     ) + -->
<!--     scale_fill_gradient2( -->
<!--       midpoint = 0, -->
<!--       high = "red", -->
<!--       low = "blue", -->
<!--       name = "log-FC" -->
<!--     ) + -->
<!--     xlab(paste(nhood_reduced_dims, components[1], sep="_")) + -->
<!--     ylab(paste(nhood_reduced_dims, components[2], sep="_")) -->

<!--   if (!is.null(split_by)) { -->
<!--     pl <- pl + facet_wrap(split_by~.) -->
<!--   } -->
<!--   if (!is.null(filter_alpha)) { -->
<!--     pl <- pl + -->
<!--       scale_color_manual(values = 'grey', label = paste("SpatialFDR >", round(filter_alpha, 2))) + -->
<!--       guides(colour = guide_legend( -->
<!--         '', -->
<!--         override.aes = list( -->
<!--           shape = 21, -->
<!--           colour = "black", -->
<!--           fill = "grey50", -->
<!--           size = pt_size, -->
<!--           alpha = 1, -->
<!--           stroke = 0.1 -->
<!--         ) -->
<!--       )) -->
<!--   } else { -->
<!--     pl <- pl + -->
<!--       scale_color_manual(values = 'grey') + -->
<!--       guides(color="none") -->
<!--   } -->

<!--   pl <- pl + -->
<!--     theme_classic(base_size = 16) + -->
<!--     theme( -->
<!--       axis.ticks = element_blank(), -->
<!--       axis.text = element_blank(), -->
<!--       plot.title = element_text(hjust = 0.5) -->
<!--     ) -->
<!-- pl -->
<!-- ``` -->

<!-- ```{r} -->
<!-- endo_milo <- scater::runPCA(endo_milo, subset_row=hvgs) -->
<!-- ``` -->



<!-- --- -->
<!-- ## Old (before I got dataset from authors) -->

<!-- Using data from [Ramachandran et al. 2019](https://www.nature.com/articles/s41586-019-1631-3#Sec1) (GEO accessiion: GSE136103).  -->

<!-- ```{r} -->
<!-- human_files <- list.files("~/Downloads/GSE136103_RAW", pattern="GSM40411.._healthy|cirrhotic", full.names = TRUE)  -->

<!-- prefixes <- str_remove(human_files, "barcodes.tsv.gz|genes.tsv.gz|matrix.mtx.gz") %>% -->
<!--   # str_remove(".+/") %>% -->
<!--   unique()  -->

<!-- sce_ls <- lapply(prefixes, function(x) read10xCounts(x, type="prefix")) -->
<!-- liver_sce <- purrr::reduce(sce_ls, cbind) -->

<!-- ## Make colData info -->
<!-- new_coldata <- colData(liver_sce) %>% -->
<!--   data.frame() %>% -->
<!--   mutate(Sample=str_remove(Sample, ".+/") %>% str_remove("_$")) %>% -->
<!--   separate(Sample, into=c("col1", "Patient", "Sort"), sep = "_", remove=FALSE) %>%  -->
<!--   mutate(Condition=str_remove(Patient, ".$")) %>% -->
<!--   select(-col1) %>% -->
<!--   mutate(Cell_id = str_c(Sample, "_",Barcode)) %>% -->
<!--   column_to_rownames("Cell_id") -->

<!-- colnames(liver_sce) <- rownames(new_coldata) -->
<!-- colData(liver_sce) <- DataFrame(new_coldata) -->

<!-- # saveRDS(liver_sce, "~/GSE136103_SingleCellExperiment.RDS") -->
<!-- ``` -->
<!-- ```{r} -->
<!-- liver_sce <- readRDS("~/GSE136103_SingleCellExperiment.RDS") -->
<!-- ``` -->

<!-- QC metrics show that the outlier cells are already filtered (following [this](https://osca.bioconductor.org/overview.html#data-processing-and-downstream-analysis)) -->

<!-- ```{r, fig.width=10,fig.height=8} -->
<!-- # is.mito <- grepl("^MT-", rownames(liver_sce)) -->
<!-- qcstats <- perCellQCMetrics(liver_sce) -->

<!-- colData(liver_sce) <- cbind(colData(liver_sce), qcstats) -->

<!-- plotColData(liver_sce, x="Sample", y = "total", other_fields = "Condition") + -->
<!--   scale_y_log10() + -->
<!--   facet_wrap(~Condition, scales = "free") + -->
<!--   ggtitle("total counts")  -->

<!-- plotColData(liver_sce, x="Sample", y = "detected", other_fields = "Condition") + -->
<!--   facet_wrap(~Condition, scales = "free") + -->
<!--   ggtitle("Detected genes")  -->

<!-- ``` -->

<!-- ### Normalization -->

<!-- ```{r} -->
<!-- lib_sf <- librarySizeFactors(liver_sce) -->
<!-- hist(log10(lib_sf), xlab="Log10[Size factor]", col='grey80') -->
<!-- ``` -->
<!-- ```{r} -->
<!-- set.seed(100) -->
<!-- liver_sce <- logNormCounts(liver_sce) -->
<!-- assayNames(liver_sce) -->
<!-- ``` -->

<!-- ### Feature selection -->

<!-- ```{r} -->
<!-- library(scran) -->
<!-- dec_liver <- modelGeneVar(liver_sce) -->

<!-- # Visualizing the fit: -->
<!-- fit_liver <- metadata(dec_liver) -->
<!-- plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression", -->
<!--     ylab="Variance of log-expression") -->

<!-- hvgs <- getTopHVGs(dec_liver, n=3000) -->
<!-- ``` -->



<!-- ### Dim reduction -->

<!-- ```{r, fig.height=14, fig.width=14} -->
<!-- liver_sce <- scater::runPCA(liver_sce, subset_row=hvgs, ncomponents=30) -->
<!-- reducedDim(liver_sce, "PCA") <- reducedDim(liver_sce, "PCA")[,1:11] -->
<!-- plotPCA(liver_sce, colour_by="Condition", ncomponents=3) -->
<!-- ``` -->

<!-- ```{r, fig.height=8, fig.width=8} -->
<!-- liver_sce <- runUMAP(liver_sce, dimred="PCA", ncomponents=2) -->

<!-- scater::plotUMAP(liver_sce, colour_by="Condition", point_alpha=1,  point_size=0.8)  -->
<!-- scater::plotUMAP(liver_sce, colour_by="Sort", point_alpha=0.3,  point_size=0.5) -->
<!-- scater::plotUMAP(liver_sce, colour_by="Patient", point_alpha=0.3,  point_size=0.5) -->
<!-- ``` -->
<!-- ```{r, fig.height=10, fig.width=10} -->
<!-- rownames(liver_sce) <- rowData(liver_sce)$Symbol -->
<!-- scater::plotUMAP(liver_sce, colour_by="CD3D", point_alpha=0.3, point_size=0.5) -->
<!-- ``` -->



